home *** CD-ROM | disk | FTP | other *** search
/ Chip 2007 January, February, March & April / Chip-Cover-CD-2007-02.iso / Pakiet internetowy / Rozne / HTTrack 3.40-2 / httrack-3.40-2.exe / {app} / src / htsalias.c < prev    next >
C/C++ Source or Header  |  2005-12-17  |  19KB  |  555 lines

  1. /* ------------------------------------------------------------ */
  2. /*
  3. HTTrack Website Copier, Offline Browser for Windows and Unix
  4. Copyright (C) Xavier Roche and other contributors
  5.  
  6. This program is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU General Public License
  8. as published by the Free Software Foundation; either version 2
  9. of the License, or any later version.
  10.  
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. GNU General Public License for more details.
  15.  
  16. You should have received a copy of the GNU General Public License
  17. along with this program; if not, write to the Free Software
  18. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19.  
  20.  
  21. Important notes:
  22.  
  23. - We hereby ask people using this source NOT to use it in purpose of grabbing
  24. emails addresses, or collecting any other private information on persons.
  25. This would disgrace our work, and spoil the many hours we spent on it.
  26.  
  27.  
  28. Please visit our Website: http://www.httrack.com
  29. */
  30.  
  31.  
  32. /* ------------------------------------------------------------ */
  33. /* File: htsalias.c subroutines:                                */
  34. /*       alias for command-line options and config files        */
  35. /* Author: Xavier Roche                                         */
  36. /* ------------------------------------------------------------ */
  37.  
  38. /* Internal engine bytecode */
  39. #define HTS_INTERNAL_BYTECODE
  40.  
  41. #include "htsbase.h"
  42. #include "htsalias.h"
  43. #include "htsglobal.h"
  44.  
  45. void linput(FILE* fp,char* s,int max);
  46. void hts_lowcase(char* s);
  47.  
  48. #define _NOT_NULL(a) ( (a!=NULL) ? (a) : "" )
  49. // #define is_realspace(c) (strchr(" \x0d\x0a\x09\x0b\x0c",(c))!=NULL)
  50. #define is_realspace(c) ( \
  51.        (c) == ' '                \
  52.     || (c) == '\x09'        \
  53.     || (c) == '\x0a'        \
  54.     || (c) == '\x0b'        \
  55.     || (c) == '\x0c'        \
  56.     || (c) == '\x0d'        \
  57. )
  58.  
  59. // COPY OF cmdl_ins in htsmain.c
  60. // Insert a command in the argc/argv
  61. #define cmdl_ins(token,argc,argv,buff,ptr) \
  62.   { \
  63.   int i; \
  64.   for(i=argc;i>0;i--)\
  65.   argv[i]=argv[i-1];\
  66.   } \
  67.   argv[0]=(buff+ptr); \
  68.   strcpybuff(argv[0],token); \
  69.   ptr += (strlen(argv[0])+1); \
  70.   argc++
  71. // END OF COPY OF cmdl_ins in htsmain.c
  72.  
  73.  
  74. /*
  75.   Aliases for command-line and config file definitions
  76.   These definitions can be used:
  77.   in command line:
  78.   --sockets=8       --cache=0
  79.   --sockets 8       --cache off
  80.                     --nocache
  81.   -c8               -C0
  82.   in config file:
  83.   sockets=8         cache=0
  84.   set sockets 8     cache off
  85.  
  86. */
  87. /*
  88.   single : no options
  89.   param  : this option allows a number parameter (1, for example) and can be mixed with other options (R1C1c8)
  90.   param1 : this option must be alone, and needs one distinct parameter (-P <path>)
  91.   param0 : this option must be alone, but the parameter should be put together (+*.gif)
  92. */
  93. const char* hts_optalias[][4] = {
  94.   /*   {"","","",""}, */
  95.   {"path","-O","param1","output path"},
  96.   {"chroot","-%O","param1","default top path"},
  97.   {"mirror","-w","single",""},
  98.   {"mirror-wizard","-W","single",""},
  99.   {"get-files","-g","single",""},
  100.   {"quiet","-q","single",""},
  101.   {"mirrorlinks","-Y","single",""},
  102.   {"proxy","-P","param1","proxy name:port"},
  103.   {"bind","-%b","param1","hostname to bind"},
  104.   {"httpproxy-ftp","-%f","param",""},
  105.   {"depth","-r","param",""},{"recurse-levels","-r","param",""},
  106.   {"ext-depth","-%e","param",""},
  107.   {"max-files","-m","param",""},
  108.   {"max-size","-M","param",""},
  109.   {"max-time","-E","param",""},
  110.   {"max-mms-time","-%m","param",""},
  111.   {"max-rate","-A","param",""},
  112.   {"max-pause","-G","param",""},
  113.   {"sockets","-c","param","number of simultaneous connections allowed"},{"socket","-c","param","number of simultaneous connections allowed"},{"connection","-c","param","number of simultaneous connections allowed"},
  114.   {"connection-per-second","-%c","param","number of connection per second allowed"},
  115.   {"timeout","-T","",""},
  116.   {"retries","-R","param","number of retries for non-fatal errors"},
  117.   {"min-rate","-J","param",""},
  118.   {"host-control","-H","param",""},
  119.   {"extended-parsing","-%P","param",""},
  120.   {"near","-n","single",""},
  121.   {"delayed-type-check","-%N","single",""},
  122.   {"cached-delayed-type-check","-%D","single",""},
  123.   {"delayed-type-check-always","-%N2","single",""},
  124.   {"disable-security-limits","-%!","single",""},
  125.   {"test","-t","single",""},
  126.   {"list","-%L","param1",""},
  127.   {"urllist","-%S","param1",""},
  128.   {"language","-%l","param1",""}, {"lang","-%l","param1",""},
  129.   {"structure","-N","param",""}, {"user-structure","-N","param1",""},
  130.   {"long-names","-L","param",""},
  131.   {"keep-links","-K","param",""},
  132.   {"mime-html","-%M","single",""}, {"mht","-%M","single",""},
  133.   {"replace-external","-x","single",""},
  134.   {"disable-passwords","-%x","single",""},{"disable-password","-%x","single",""},
  135.   {"include-query-string","-%q","single",""},
  136.   {"generate-errors","-o","single",""},
  137.   {"purge-old","-X","param",""},
  138.   {"cookies","-b","param",""},
  139.   {"check-type","-u","param",""},
  140.   {"assume","-%A","param1",""}, {"mimetype","-%A","param1",""},
  141.   {"parse-java","-j","param",""},
  142.   {"protocol","-@i","param",""},
  143.   {"robots","-s","param",""},
  144.   {"http-10","-%h","single",""},{"http-1.0","-%h","single",""},
  145.   {"keep-alive","-%k","single",""},
  146.   {"build-top-index","-%i","single",""},
  147.   {"disable-compression","-%z","single",""},
  148.   {"tolerant","-%B","single",""},
  149.   {"updatehack","-%s","single",""}, {"sizehack","-%s","single",""},
  150.   {"urlhack","-%u","single",""},
  151.   {"user-agent","-F","param1","user-agent identity"},
  152.   {"referer","-%R","param1","default referer URL"},
  153.   {"from","-%E","param1","from email address"},
  154.   {"footer","-%F","param1",""},
  155.   {"cache","-C","param","number of retries for non-fatal errors"},
  156.   {"store-all-in-cache","-k","single",""},
  157.   {"do-not-recatch","-%n","single",""},
  158.   {"do-not-log","-Q","single",""},
  159.   {"extra-log","-z","single",""},
  160.   {"debug-log","-Z","single",""},
  161.   {"verbose","-v","single",""},
  162.   {"file-log","-f","single",""},
  163.   {"single-log","-f2","single",""},
  164.   {"index","-I","single",""},
  165.   {"search-index","-%I","single",""},
  166.   {"priority","-p","param",""},
  167.   {"debug-headers","-%H","single",""},
  168.   {"userdef-cmd","-V","param1",""},
  169.   {"callback","-%W","param1","plug an external callback"}, {"wrapper","-%W","param1","plug an external callback"},
  170.   {"structure","-N","param1","user-defined structure"},
  171.   {"usercommand","-V","param1","user-defined command"},
  172.   {"display","-%v","single","show files transfered and other funny realtime information"},
  173.   {"dos83","-L0","single",""},
  174.   {"iso9660","-L2","single",""},
  175.   /* */
  176.  
  177.   /* DEPRECATED */
  178.   {"stay-on-same-dir","-S","single","stay on the same directory - DEPRECATED"},
  179.   {"can-go-down","-D","single","can only go down into subdirs - DEPRECATED"},
  180.   {"can-go-up","-U","single","can only go to upper directories- DEPRECATED"},
  181.   {"can-go-up-and-down","-B","single","can both go up&down into the directory structure - DEPRECATED"},
  182.   {"stay-on-same-address","-a","single","stay on the same address - DEPRECATED"},
  183.   {"stay-on-same-domain","-d","single","stay on the same principal domain - DEPRECATED"},
  184.   {"stay-on-same-tld","-l","single","stay on the same TLD (eg: .com) - DEPRECATED"},
  185.   {"go-everywhere","-e","single","go everywhere on the web - DEPRECATED"},
  186.  
  187.   /* Badly documented */
  188.   {"debug-testfilters","-#0","param1","debug: test filters"},
  189.   {"advanced-flushlogs","-#f","single",""},
  190.   {"advanced-maxfilters","-#F","param",""},
  191.   {"version","-#h","single",""},
  192.   {"debug-scanstdin","-#K","single",""},
  193.   {"advanced-maxlinks","-#L","single",""},
  194.   {"advanced-progressinfo","-#p","single","deprecated"},
  195.   {"catch-url","-#P","single","catch complex URL through proxy"},
  196.   {"debug-oldftp","-#R","single",""},
  197.   {"debug-xfrstats","-#T","single",""},
  198.   {"advanced-wait","-#u","single",""},
  199.   {"debug-ratestats","-#Z","single",""},
  200.   {"exec","-#!","param1",""},
  201.   {"fast-engine","-#X","single","Enable fast routines"},
  202.   {"debug-overflows","-#X0","single","Attempt to detect buffer overflows"},
  203.   {"debug-cache","-#C","param1","List files in the cache"},
  204.   {"extract-cache","-#C","single","Extract meta-data"},
  205.   {"debug-parsing","-#d","single","debug: test parser"},
  206.   {"repair-cache","-#R","single","repair the damaged cache ZIP file"}, {"repair","-#R","single",""},
  207.  
  208.   /* STANDARD ALIASES */
  209.   {"spider","-p0C0I0t","single",""},
  210.   {"testsite","-p0C0I0t","single",""},
  211.   {"testlinks","-r1p0C0I0t","single",""}, {"test","-r1p0C0I0t","single",""}, {"bookmark","-r1p0C0I0t","single",""},
  212.   {"mirror","-w","single",""},
  213.   {"testscan","-p0C0I0Q","single",""}, {"scan","-p0C0I0Q","single",""}, {"check","-p0C0I0Q","single",""},
  214.   {"skeleton","-p1","single",""},
  215.   {"preserve","-%p","single",""},
  216.   {"get","-qg","single",""},
  217.   {"update","-iC2","single",""},
  218.   {"continue","-iC1","single",""}, {"restart","-iC1","single",""},
  219.   {"continue","-i","single",""}, /* for help alias */
  220.   {"sucker","-r999","single",""},
  221.   {"help","-h","single",""}, {"documentation","-h","single",""}, {"doc","-h","single",""},
  222.   {"wide","-c32","single",""},
  223.   {"tiny","-c1","single",""},
  224.   {"ultrawide","-c48","single",""},
  225.   {"http10","-%h","single",""},
  226.   {"filelist","-%L","single",""}, {"list","-%L","single",""},
  227.   {"filterlist","-%S","single",""},
  228.   /* END OF ALIASES */
  229.  
  230.   /* Filters */
  231.   {"allow","+","param0","allow filter"},
  232.   {"deny","-","param0","deny filter"},
  233.   /* */
  234.  
  235.   /* URLs */
  236.   {"add","","param0","add URLs"},
  237.   /* */
  238.  
  239.   /* Specific */
  240.   {"user","-%U","param1","output path"},
  241.   /* */
  242.  
  243.   /* Internal */
  244.   {"catchurl","--catchurl","single","catch complex URL through proxy"},
  245.   {"updatehttrack","--updatehttrack","single","update HTTrack Website Copier"},
  246.   {"clean","--clean","single","clean up log files and cache"},
  247.   {"tide","--clean","single","clean up log files and cache"},
  248.   {"autotest","-#T","single",""},
  249.   /* */
  250.  
  251.   {"","","",""}
  252. };
  253.  
  254.  
  255. /* 
  256.   Check for alias in command-line 
  257.   argc,argv     as in main()
  258.   n_arg         argument position
  259.   return_argv   a char[2][] where to put result
  260.   return_error  buffer in case of syntax error
  261.  
  262.   return value: number of arguments treated (0 if error)
  263. */
  264. int optalias_check(int argc,const char * const * argv,int n_arg,
  265.                    int* return_argc,char** return_argv,
  266.                    char* return_error) {
  267.   return_error[0]='\0';
  268.   *return_argc=1;
  269.   if (argv[n_arg][0]=='-')
  270.   if (argv[n_arg][1]=='-') {
  271.     char command[1000];
  272.     char param[1000];
  273.     char addcommand[256];
  274.     /* */
  275.     char* position;
  276.     int need_param=1;
  277.     //int return_param=0;
  278.     int pos;
  279.     command[0]=param[0]=addcommand[0]='\0';
  280.  
  281.     /* --sockets=8 */
  282.     if ( (position=strchr(argv[n_arg],'=')) ) {
  283.       /* Copy command */
  284.       strncatbuff(command,argv[n_arg]+2,(int) (position - (argv[n_arg]+2)) );
  285.       /* Copy parameter */
  286.       strcpybuff(param,position+1);
  287.     }
  288.     /* --nocache */
  289.     else if (strncmp(argv[n_arg]+2,"no",2)==0) {
  290.       strcpybuff(command,argv[n_arg]+4);
  291.       strcpybuff(param,"0");
  292.     }
  293.     /* --sockets 8 */
  294.     else {
  295.       if (strncmp(argv[n_arg]+2,"wide-",5)==0) {
  296.         strcpybuff(addcommand,"c32");
  297.         strcpybuff(command,strchr(argv[n_arg]+2,'-')+1);
  298.       } else if (strncmp(argv[n_arg]+2,"tiny-",5)==0) {
  299.         strcpybuff(addcommand,"c1");
  300.         strcpybuff(command,strchr(argv[n_arg]+2,'-')+1);
  301.       } else
  302.         strcpybuff(command,argv[n_arg]+2);
  303.       need_param=2;
  304.     }
  305.  
  306.     /* Now solve the alias */
  307.     pos=optalias_find(command);
  308.     if (pos>=0) {
  309.       /* Copy real name */
  310.       strcpybuff(command,hts_optalias[pos][1]);
  311.       /* With parameters? */
  312.       if (strncmp(hts_optalias[pos][2],"param",5)==0) {
  313.         /* Copy parameters? */
  314.         if (need_param == 2) {
  315.           if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) {  /* no supplemental parameter */
  316.             sprintf(return_error,
  317.               "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
  318.               command,command,_NOT_NULL(optalias_help(command)));
  319.             return 0;
  320.           }
  321.           strcpybuff(param,argv[n_arg+1]);
  322.           need_param=2;
  323.         }
  324.       } else
  325.         need_param=1;
  326.  
  327.       /* Final result */
  328.  
  329.       /* Must be alone (-P /tmp) */
  330.       if (strcmp(hts_optalias[pos][2],"param1")==0) {
  331.         strcpybuff(return_argv[0],command);
  332.         strcpybuff(return_argv[1],param);
  333.         *return_argc=2;     /* 2 parameters returned */
  334.       } 
  335.       /* Alone with parameter (+*.gif) */
  336.       else if (strcmp(hts_optalias[pos][2],"param0")==0) {
  337.         /* Command */
  338.         strcpybuff(return_argv[0],command);
  339.         strcatbuff(return_argv[0],param);
  340.       }
  341.       /* Together (-c8) */
  342.       else {
  343.         /* Command */
  344.         strcpybuff(return_argv[0],command);
  345.         /* Parameters accepted */
  346.         if (strncmp(hts_optalias[pos][2],"param",5)==0) {
  347.           /* --cache=off or --index=on */
  348.           if (strcmp(param,"off")==0)
  349.             strcatbuff(return_argv[0],"0");
  350.           else if (strcmp(param,"on")==0) {
  351.             // on is the default
  352.             // strcatbuff(return_argv[0],"1");
  353.           } else
  354.             strcatbuff(return_argv[0],param);
  355.         }
  356.         *return_argc=1;     /* 1 parameter returned */
  357.       }
  358.     } else {
  359.       sprintf(return_error,"Unknown option: %s\n",command);
  360.       return 0;
  361.     }
  362.     return need_param;
  363.   }
  364.  
  365.   /* Check -O <path> */
  366.   {
  367.     int pos;
  368.     if ((pos=optreal_find(argv[n_arg]))>=0) {
  369.       if ( (strcmp(hts_optalias[pos][2],"param1")==0) || (strcmp(hts_optalias[pos][2],"param0")==0)) {
  370.         if ((n_arg+1>=argc) || (argv[n_arg+1][0]=='-')) {  /* no supplemental parameter */
  371.           sprintf(return_error,
  372.             "Syntax error:\n\tOption %s needs to be followed by a parameter: %s <param>\n\t%s\n",
  373.             argv[n_arg],argv[n_arg],_NOT_NULL(optalias_help(argv[n_arg])));
  374.           return 0;
  375.         }
  376.         /* Copy parameters */
  377.         strcpybuff(return_argv[0],argv[n_arg]);
  378.         strcpybuff(return_argv[1],argv[n_arg+1]);
  379.         /* And return */
  380.         *return_argc=2;     /* 2 parameters returned */
  381.         return 2;           /* 2 parameters used */
  382.       }
  383.     }
  384.   }
  385.   
  386.   /* Copy and return other unknown option */
  387.   strcpybuff(return_argv[0],argv[n_arg]);
  388.   return 1;
  389. }
  390.  
  391. /* Finds the <token> option alias and returns the index, or -1 if failed */
  392. int optalias_find(const char* token) {
  393.   if (token[0] != '\0') {
  394.     int i=0;
  395.     while(hts_optalias[i][0][0] != '\0') {
  396.       if (strcmp(token,hts_optalias[i][0])==0) {
  397.         return i;
  398.       }
  399.       i++;
  400.     }
  401.   }
  402.   return -1;
  403. }
  404.  
  405. /* Finds the <token> real option and returns the index, or -1 if failed */
  406. int optreal_find(const char* token) {
  407.   if (token[0] != '\0') {
  408.     int i=0;
  409.     while(hts_optalias[i][0][0] != '\0') {
  410.       if (strcmp(token,hts_optalias[i][1])==0) {
  411.         return i;
  412.       }
  413.       i++;
  414.     }
  415.   }
  416.   return -1;
  417. }
  418.  
  419. const char* optreal_value(int p) {
  420.   return hts_optalias[p][1];
  421. }
  422. const char* optalias_value(int p) {
  423.   return hts_optalias[p][0];
  424. }
  425. const char* opttype_value(int p) {
  426.   return hts_optalias[p][2];
  427. }
  428. const char* opthelp_value(int p) {
  429.   return hts_optalias[p][3];
  430. }
  431.  
  432. /* Help for option <token>, empty if not available, or NULL if unknown <token> */
  433. const char* optalias_help(const char* token) {
  434.   int pos=optalias_find(token);
  435.   if (pos>=0)
  436.     return hts_optalias[pos][3];
  437.   else
  438.     return NULL;
  439. }
  440.  
  441. /* Include a file to the current command line */
  442. /* example:
  443.   set sockets 8
  444.   index on
  445.   allow *.gif
  446.   deny ad.*
  447. */
  448. int optinclude_file(const char* name,
  449.                     int* argc,char** argv,char* x_argvblk,int* x_ptr) {
  450.   FILE* fp;
  451.   fp=fopen(name,"rb");
  452.   if (fp) {
  453.     char line[256];
  454.     int insert_after=1;       /* first, insert after program filename */
  455.     while(!feof(fp)) {
  456.       char *a,*b;
  457.       int result;
  458.       
  459.       /* read line */
  460.       linput(fp,line,250);
  461.       hts_lowcase(line);
  462.       if (strnotempty(line)) {
  463.         /* no comment line: # // ; */
  464.         if (strchr("#/;",line[0])==NULL) {
  465.           /* right trim */
  466.           a=line+strlen(line)-1;
  467.           while(is_realspace(*a)) *(a--) = '\0';
  468.           /* jump "set " and spaces */
  469.           a=line;
  470.           while(is_realspace(*a)) a++;
  471.           if (strncmp(a,"set",3)==0) {
  472.             if (is_realspace(*(a+3))) {
  473.               a+=4;
  474.             }
  475.           }
  476.           while(is_realspace(*a)) a++;
  477.           /* delete = ("sockets=8") */
  478.           if ( (b=strchr(a,'=')) )
  479.             *b=' ';
  480.           
  481.           /* isolate option and parameter */
  482.           b=a;
  483.           while( (!is_realspace(*b)) && (*b) ) b++;
  484.           if (*b) {
  485.             *b='\0';
  486.             b++;
  487.           }
  488.           /* a is now the option, b the parameter */
  489.           
  490.           {
  491.             int return_argc;
  492.             char return_error[256];
  493.             char  _tmp_argv[4][HTS_CDLMAXSIZE];
  494.             char*  tmp_argv[4];
  495.             tmp_argv[0]=_tmp_argv[0]; tmp_argv[1]=_tmp_argv[1]; tmp_argv[2]=_tmp_argv[2]; tmp_argv[3]=_tmp_argv[3];
  496.             strcpybuff(tmp_argv[0],"--");
  497.             strcatbuff(tmp_argv[0],a);
  498.             strcpybuff(tmp_argv[1],b);
  499.             
  500.             result=optalias_check(2,(const char * const *)tmp_argv,0,
  501.               &return_argc,(tmp_argv+2),
  502.               return_error);
  503.             if (!result) {
  504.               printf("%s\n",return_error);
  505.             } else {
  506.               int insert_after_argc;
  507.               /* Insert parameters BUT so that they can be in the same order */
  508.               /* temporary argc: Number of parameters after minus insert_after_argc */
  509.               insert_after_argc=(*argc)-insert_after;
  510.               cmdl_ins((tmp_argv[2]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
  511.               *argc=insert_after_argc+insert_after;
  512.               insert_after++;
  513.               /* Second one */
  514.               if (return_argc>1) {
  515.                 insert_after_argc=(*argc)-insert_after;
  516.                 cmdl_ins((tmp_argv[3]),insert_after_argc,(argv+insert_after),x_argvblk,(*x_ptr));
  517.                 *argc=insert_after_argc+insert_after;
  518.                 insert_after++;
  519.               }
  520.               /* increment to nbr of used parameters */
  521.               /* insert_after+=result; */
  522.             }
  523.           }
  524.         }
  525.         
  526.       }
  527.     }
  528.     fclose(fp);
  529.     return 1;
  530.   }
  531.   return 0;
  532. }
  533.  
  534. /* Get home directory, '.' if failed */
  535. /* example: /home/smith */
  536. char* hts_gethome(void) {
  537. #ifndef _WIN32_WCE
  538.   char* home = getenv( "HOME" );
  539.   if (home)
  540.     return home;
  541.   else
  542. #endif
  543.     return ".";
  544. }
  545.  
  546. /* Convert ~/foo into /home/smith/foo */
  547. void expand_home(char* str) {
  548.   if (str[0] == '~') {
  549.     char BIGSTK tempo[HTS_URLMAXSIZE*2];
  550.     strcpybuff(tempo,hts_gethome());
  551.     strcatbuff(tempo,str+1);
  552.     strcpybuff(str,tempo);
  553.   }
  554. }
  555.